library(plyr)
library(stringr)
library(magrittr)
library(tidyverse)
library(Matching)
library(broom)
library(readxl)
library(parallel)
library(doParallel)
library(vietnamcode)


rm(list=ls())
home = 'C:/Users/Jason/Dropbox/VNA_Responsiveness/Analysis/JOP-dataverse/'


data('vietnamcode_data')
covariates_for_balance = c('fulltime',
                           'central_nominated',
                           'competitive',
                           'local_NA',
                           'delegation',
                           'papi_score',
                           'pci_score',
                           'ln_gdp_2014',
                           'pop_2014',
                           'gdpcap_2014',
                           'transfers_2016')


dv_survey = paste0(home, 'survey-outcomes.xlsx') %>%
  read_xlsx %>%
  mutate(Treatment=factor(x=Treatment, 
                          levels=c('Control',
                                   'Citizen',
                                   'Firm')),
         Missing=as.integer(is.na(Q1))) %>%
  subset(!is.na(Treatment))
dv_pooled = paste0(home, 'pooled-outcomes.xlsx') %>%
  read_xlsx %>%
  mutate(Treatment=factor(x=Treatment, 
                          levels=c('Control',
                                   'Citizen',
                                   'Firm'))) %>%
  subset(!is.na(Treatment))
dv_caucus = paste0(home, 'caucus-outcomes.xlsx') %>%
  read_xlsx %>%
  mutate(Treatment=factor(x=Treatment, 
                          levels=c('Control',
                                   'Citizen',
                                   'Firm'))) %>%
  subset(!is.na(Treatment))
dv_query = paste0(home, 'query-outcomes.xlsx') %>%
  read_xlsx %>%
  mutate(Treatment=factor(x=Treatment, 
                          levels=c('Control',
                                   'Citizen',
                                   'Firm'))) %>%
  subset(!is.na(Treatment))
dv_floor = paste0(home, 'floor-outcomes.xlsx') %>%
  read_xlsx %>%
  mutate(Treatment=factor(x=Treatment, 
                          levels=c('Control',
                                   'Citizen',
                                   'Firm'))) %>%
  subset(!is.na(Treatment))


df_province_raw = paste0(home, 'provincial-data.csv') %>%
  read_csv %>%
  mutate(Province=mapvalues(x=province, 
                            from=vietnamcode_data$province_name_diacritics, 
                            to=vietnamcode_data$province_name))
df_province_raw$Province[str_detect(string=df_province_raw$province, 
                                    pattern='a - V')] = 'BRVT'


df_province = df_province_raw %>%
  dplyr::select(Province, match(x=covariates_for_balance, 
                         table=colnames(df_province_raw))) %>%
  mutate_at(vars(-Province), funs(scale(.) %>% as.vector))
class(df_province) = 'data.frame'


df_province_match = df_province %>% # Remove Hanoi, HCMC, Dong Nai
  filter(!(Province %in% c('Dong Nai','TP HCM','Ha Noi')))


df_delegate_raw = paste0(home, '14th-VNA-membership.csv') %>%
  read_csv %>%
  mutate(Province=mapvalues(x=province, 
                               from=vietnamcode_data$province_name_diacritics, 
                               to=vietnamcode_data$province_name),
         Province=as.character(Province))
df_delegate_raw$Province[str_detect(string=df_delegate_raw$province, 
                                    pattern='a - V')] = 'BRVT'
df_delegate_raw = df_delegate_raw %>%
  arrange(Province, name)
df_delegate_raw$ID = llply(.data=df_delegate_raw$name, 
                           .fun=function(x) {
  found = str_detect(string=dv_pooled$Name_VN, 
                     pattern=x) %>%
    which
  if(length(found)<1) {
    NA
  } else if(length(found)==1) {
    dv_pooled$ID[found]
  } else {
    999L
  }
}, .inform=T) %>% unlist
df_delegate_raw$ID[is.na(df_delegate_raw$ID) | df_delegate_raw$ID==999L] = as.integer(c(59,57,157,257,149,201,233,134,133,455,449,115,262,260,197,120,33,369,215,88,298,174,312,422,418,419,69,393,137,221,109,253,NA,22,404,92,9,234,236,19,331,163,514,302,358,81))
df_delegate_raw_survey = merge(x=df_delegate_raw, 
                               y=subset(dv_survey, 
                                        select=-c(Province,
                                                  Dosage)), 
                               by='ID', 
                               all.x=T) %>%
  subset(!is.na(ID))
df_delegate_raw_pooled = merge(x=df_delegate_raw, 
                               y=subset(dv_pooled, 
                                        select=-c(Province,
                                                  Dosage)), 
                               by='ID', 
                               all.x=T) %>%
  subset(!is.na(ID))
df_delegate_raw_caucus = merge(x=df_delegate_raw, 
                               y=subset(dv_caucus, 
                                        select=-c(Province,
                                                  Dosage)), 
                               by='ID', 
                               all.x=T) %>%
  subset(!is.na(ID))
df_delegate_raw_query = merge(x=df_delegate_raw, 
                              y=subset(dv_query, 
                                       select=-c(Province,
                                                 Dosage)), 
                              by='ID', 
                              all.x=T) %>%
  subset(!is.na(ID))
df_delegate_raw_floor = merge(x=df_delegate_raw, 
                              y=subset(dv_floor, 
                                       select=-c(Province,
                                                 Dosage)), 
                              by='ID', 
                              all.x=T) %>%
  subset(!is.na(ID))


paste0(home, 'data-prep-one-randomization.R') %>%
  source
paste0(home, 'data-prep-one-analysis.R') %>%
  source


iterations = 1e4
cl = makeCluster(6)
registerDoParallel(cl)

set.seed(31415)
timed = Sys.time()
assignments = foreach(ii=icount(iterations), .packages=c('magrittr','plyr','dplyr','Matching'), 
                      .combine=rbind, .inorder=F) %dopar% { one.randomization(df_province_match, df_province, df_delegate_raw) }
results = foreach(ii=icount(iterations), .packages=c('magrittr','plyr','dplyr','broom'),
                  .combine=rbind, .inorder=F) %dopar% { one.analysis(assignments) }
timed = diff(c(timed, Sys.time()))

stopCluster(cl)


paste0(home, 'RI-assignments.Rds') %>%
  saveRDS(object=assignments, file=.)
paste0(home, 'RI-analyses.Rds') %>%
  saveRDS(object=results, file=.)
